mask_siteid_sampling <- site_protocol_quanti[
  site_protocol_quanti$variable == "year" &
    site_protocol_quanti$n >= 10,
  ]$siteid

mask_siteid_protocol <- site_protocol_quali[
  site_protocol_quali$unitabundance %in% c("Count", "Ind.100m2"), ]$siteid

mask_siteid <- mask_siteid_sampling[mask_siteid_sampling %in% mask_siteid_protocol]
trends_data <- abun_rich_op %>%
  left_join(op_protocol, by = "op_id") %>%
  filter(siteid %in% mask_siteid) %>%
  mutate(
    log_total_abundance = log(total_abundance),
    log_species_nb = log(species_nb)
  )
plot_community_data <- function(dataset = NULL, y = NULL, x = NULL, title = NULL) {

  p <- dataset %>%
    ggplot(aes_string(y = y, x = x)) +
    geom_point() +
    geom_smooth(method = "loess", formula = "y ~ x")

  if (!is.null(title)) {
    p <- p +
      labs(title = title)
  }

  return(p)
}

plot_trends <- trends_data %>%
  group_by(siteid) %>%
  nest() %>%
  ungroup() %>%
  slice_sample(n = 100) %>%
  mutate(
    p_abun = map2(data, siteid,
      ~plot_community_data(
        dataset = .x, y = "total_abundance", x = "year", title = .y)),
    p_rich = map2(data, siteid,
      ~plot_community_data(
        dataset = .x, y = "species_nb", x = "year", title = .y),
    )
  )

0.1 Total abundance

n_plot_by_batch <- 8
map(
  split(
    seq_len(nrow(plot_trends)),
    1:floor(nrow(plot_trends) / n_plot_by_batch) + 1),
  ~plot_grid(plotlist = plot_trends[.x, ]$p_abun)
  )
#> Warning in split.default(seq_len(nrow(plot_trends)), 1:floor(nrow(plot_trends)/
#> n_plot_by_batch) + : la taille de données n'est pas un multiple de la variable
#> découpée
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
#> parametric, : pseudoinverse used at 2007
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
#> parametric, : neighborhood radius 2
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
#> parametric, : reciprocal condition number 0
#> Warning in predLoess(object$y, object$x, newx = if
#> (is.null(newdata)) object$x else if (is.data.frame(newdata))
#> as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
#> 2007
#> Warning in predLoess(object$y, object$x, newx = if
#> (is.null(newdata)) object$x else if (is.data.frame(newdata))
#> as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius 2
#> Warning in predLoess(object$y, object$x, newx = if
#> (is.null(newdata)) object$x else if (is.data.frame(newdata))
#> as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
#> number 0
#> $`2`

#> 
#> $`3`

#> 
#> $`4`

#> 
#> $`5`

#> 
#> $`6`

#> 
#> $`7`

#> 
#> $`8`

#> 
#> $`9`

#> 
#> $`10`

#> 
#> $`11`

#> 
#> $`12`

#> 
#> $`13`

0.2 Species richness


map(
  split(
    seq_len(nrow(plot_trends)),
    1:floor(nrow(plot_trends) / n_plot_by_batch) + 1
    ),
  ~plot_grid(plotlist = plot_trends[.x, ]$p_rich)
  )
#> Warning in split.default(seq_len(nrow(plot_trends)), 1:floor(nrow(plot_trends)/
#> n_plot_by_batch) + : la taille de données n'est pas un multiple de la variable
#> découpée
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
#> parametric, : pseudoinverse used at 2007
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
#> parametric, : neighborhood radius 2
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
#> parametric, : reciprocal condition number 0
#> Warning in predLoess(object$y, object$x, newx = if
#> (is.null(newdata)) object$x else if (is.data.frame(newdata))
#> as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
#> 2007
#> Warning in predLoess(object$y, object$x, newx = if
#> (is.null(newdata)) object$x else if (is.data.frame(newdata))
#> as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius 2
#> Warning in predLoess(object$y, object$x, newx = if
#> (is.null(newdata)) object$x else if (is.data.frame(newdata))
#> as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
#> number 0
#> $`2`

#> 
#> $`3`

#> 
#> $`4`

#> 
#> $`5`

#> 
#> $`6`

#> 
#> $`7`

#> 
#> $`8`

#> 
#> $`9`

#> 
#> $`10`

#> 
#> $`11`

#> 
#> $`12`

#> 
#> $`13`

0.3

tar_load(toy_dataset)
unique(toy_dataset$siteid)
#> [1] "S8633"  "S11138" "S534"   "S529"   "S11219"
plot_temporal_biomass <- function (bm_data = NULL, biomass_var = NULL, com = NULL, .log = FALSE) {

  #main_title <- paste0("Stab = ", round(1/(sync$cv_com), 2),", ", "Sync = ",
    #round(sync$synchrony, 2),", ", "CVsp = ", round(sync$cv_sp, 2))
  sym_bm_var <- rlang::sym(biomass_var)
  # Total
  total_biomass <- bm_data %>% 
  group_by(date) %>%
  summarise(!!sym_bm_var := sum(!!sym_bm_var, na.rm = FALSE))
  
  p <- bm_data %>%
    mutate(label = if_else(date == max(date), as.character(species), NA_character_)) %>%
  ggplot(aes_string(x = "date", y = biomass_var, color = "species")) + 
  geom_line() +
  lims(y = c(0, max(total_biomass[[biomass_var]]))) +
  labs(
  #title = main_title, subtitle = paste0("Station: ", station),
    y = "Biomass (g)", x = "Sampling date"
  ) +
  ggrepel::geom_label_repel(aes(label = label),
    size = 2.5, nudge_x = 1, na.rm = TRUE) 
  
  # Add total biomass
  p2 <- p +
    geom_line(data = total_biomass, aes(color = "black", size = 3)) +
    theme(legend.position = "none")

  # Add summary: richness, connectance, stab, t_lvl, sync, cv_sp 
  com %<>%
    mutate_if(is.double, round(., 2))

  label <- paste(
    "S = ", com$bm_std_stab,
    "sync = ", com$sync,
    "CVsp = ", com$cv_sp,
    "R = ", com$rich_tot_std,
    "C = ", com$ct,
    "Tlvl = ", com$t_lvl
  ) 

  p3 <- p2 +
    annotate("text", x = median(total_biomass$date),
      y = 15, label = label)

  if (.log) {
    p3 <- p3 + scale_y_log10() 
  }

  return(p3)
}

ti <- toy_dataset %>%
  filter(siteid == unique(toy_dataset$siteid)[2])
plot_population <- function (dataset = NULL, y_var = NULL, time_var = NULL) {

  sym_y_var <- rlang::sym(y_var)
  sym_time_var <- rlang::sym(time_var)
  # Total
  total_dataset <- dataset %>%
  group_by(!!sym_time_var) %>%
  summarise(!!sym_y_var := sum(!!sym_y_var, na.rm = FALSE))
  
  p <- dataset %>%
    mutate(label = if_else(!!sym_time_var == max(!!sym_time_var), as.character(species), NA_character_)) %>%
  ggplot(aes_string(x = time_var, y = y_var, color = "species")) + 
  geom_line() +
  lims(y = c(0, max(total_dataset[[y_var]]))) +
  labs(
  #title = main_title, subtitle = paste0("Station: ", station),
    y = "Biomass (g)", x = "Sampling time_var"
  ) +
  ggrepel::geom_label_repel(aes(label = label),
    size = 2.5, nudge_x = 1, na.rm = TRUE)
  
  # Add total biomass
  p2 <- p +
    geom_line(data = total_dataset, aes(color = "black", size = 3)) +
    theme(legend.position = "none")
  return(p2)

}

plot_population(dataset = ti, y_var = "abundance", time_var = "year")
#> Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps


#' replace NA by 0 in abundance data 
#'
#' @param  dataset data.frame
#' @param  y_var chr
#' @param  species_var chr
#' @param  time_var  chr
#' @param  long_format lgl
#'
#' @examples
#'tar_load(toy_dataset)
#'ti <- toy_dataset %>%
#'  filter(siteid == unique(toy_dataset$siteid)[2])
#'get_com_matrix_from_site(dataset = ti, y_var = "abundance")
get_com_matrix_from_site <- function(
  dataset = NULL,
  y_var = NULL,
  species_var = "species",
  time_var = "year",
  long_format = TRUE

) {

  com <- dataset[, c(time_var, species_var, y_var)]

  species <- unique(com[[species_var]])

  com <- com %>%
    pivot_wider(names_from = species_var, values_from = y_var) %>%
    mutate(across(species, ~replace(.x, is.na(.x), 0))) %>%
    arrange(!!sym(time_var)) %>%
    complete(!!sym(time_var) := full_seq(!!sym(time_var), 1))

  if (long_format) {
    com <- com %>%
      pivot_longer(cols = species, names_to = species_var, values_to = y_var)
  }
  return(com)
}

get_com_matrix_from_site(dataset = ti, y_var = "abundance")
#> Note: Using an external vector in selections is ambiguous.
#> ℹ Use `all_of(species_var)` instead of `species_var` to silence this message.
#> ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
#> This message is displayed once per session.
#> Note: Using an external vector in selections is ambiguous.
#> ℹ Use `all_of(y_var)` instead of `y_var` to silence this message.
#> ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
#> This message is displayed once per session.
#> Note: Using an external vector in selections is ambiguous.
#> ℹ Use `all_of(species)` instead of `species` to silence this message.
#> ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
#> This message is displayed once per session.
#> # A tibble: 400 × 3
#>     year species                 abundance
#>    <dbl> <chr>                       <dbl>
#>  1  1997 Catostomus commersonii         63
#>  2  1997 Clinostomus funduloides        21
#>  3  1997 Etheostoma flabellare          14
#>  4  1997 Etheostoma olmstedi            20
#>  5  1997 Lepomis auritus                 2
#>  6  1997 Lepomis gibbosus                0
#>  7  1997 Notropis hudsonius              0
#>  8  1997 Pimephales notatus              0
#>  9  1997 Rhinichthys atratulus         119
#> 10  1997 Semotilus atromaculatus        69
#> # … with 390 more rows

plot_temporal_population <- function(
  com = NULL,
  y_var = "abundance",
  time_var = "year",
  species_var = "species",
  stacked = TRUE,
  ribbon = FALSE,
  .log = FALSE,
  color_species = NULL,
  label = NULL,
  label_parsed = FALSE,
  label_size = 4.5,
  y_label = NULL,
  my_ylim = NULL) {

  # get bm dynamic
  com <- get_com_matrix_from_site(
    dataset = com,
    y_var = y_var,
    species_var = species_var,
    time_var = time_var,
    long_format = TRUE
  )

  # get total y
  total <- com %>%
    group_by(!!sym(time_var)) %>%
    summarise(!!sym(y_var) != sum(!!sym(y_var)), .groups = "drop") %>%
    mutate(!!sym(species_var) := "Total") 

  p <- com %>%
    ggplot(aes_string(x = time_var, y = y_var, color = species_var))



  if (stacked) {
    if (ribbon) {
      com <- arrange(com, !!sym(time_var), !!sym(species_var)) 
      com$ymax <- com[[y_var]]
      com$ymin <- 0
      zl <- unique(com[[species_var]])
      for (i in 2:length(zl)) {
        zi <- com[[species_var]] == zl[i]
        zi_1 <- com[[species_var]] == zl[i - 1]
        com$ymin[zi] <- com$ymax[zi_1]
        com$ymax[zi] <- com$ymin[zi] + com$ymax[zi]
      }

      p <- com %>%
        ggplot(
          aes_string(
          x = time_var,
          y = y_var,
          ymax = "ymax",
          ymin = "ymin",
          fill = species_var)
          ) + geom_ribbon()
    } else {
      p <- p +
        geom_area(aes_string(fill = species_var))

    }
  } else {
    p <- p +
      geom_line() +
      geom_line(data = total, color = "black")
  }

  if (!is.null(my_ylim)) {
    p <- p +
      ylim(my_ylim)
  }

  # Make it professional:
  p <- p +
    labs(y = expression(Abundance), x = "Year")

#  if (!is.null(sem_df)) {
#    label <- get_network_summary(com = sem_df, station = station)
#  }

  if (!is.null(label)) {
    if (is.null(y_label)) {
      y_label <- max(total[[y_var]]) + max(total[[y_var]]) * 5 / 100
    }

    p <- p +
    annotate("text", x = median(total[[year]]),
      y = y_label,
      label = label, parse = label_parsed, size = label_size)
  }

  if (.log) {
    p <- p + scale_y_log10()
  }

  if (!is.null(color_species)) {
    p <- p +
      scale_fill_manual(values = color_species) +
      scale_color_manual(values = color_species)
  }

  return(p)


}
  
plot_temporal_population(com = ti, ribbon = FALSE)

p <- plot_temporal_population(com = ti, ribbon = TRUE)

GeomRibbon$handle_na <- function(data, params) {  data }
p$data %>%
  ggplot(
    aes(y = abundance, ymin = ymin, ymax = ymax, x = year,
      fill = species)
    ) +
  geom_ribbon()
set.seed(1)

test <- data.frame(x = rep(1:10, 3), y = abs(rnorm(30)), z = rep(LETTERS[1:3],
    10)) %>% arrange(x, z)

test[test$x == 4, "y"] <- NA

test$ymax <- test$y
test$ymin <- 0
zl <- unique(test$z)
for (i in 2:length(zl)) {
    zi <- test$z == zl[i]
    zi_1 <- test$z == zl[i - 1]
    test$ymin[zi] <- test$ymax[zi_1]
    test$ymax[zi] <- test$ymin[zi] + test$ymax[zi]
}


# fix GeomRibbon
GeomRibbon$handle_na <- function(data, params) {  data }

ggplot(test, aes(x = x, y=y, ymax = ymax, ymin = ymin, fill = z)) +
  geom_ribbon()
toy_dataset %>%
  group_by(siteid, year, species) %>%
  summarise(test=n()>1) %>%
  filter(test)
pop_trends <- toy_dataset %>%
  filter(!siteid %in% c("S534", "S8633")) %>%
  group_by(siteid) %>%
  nest() %>%
  mutate(
    p_pop = map(data, ~plot_temporal_population(com = .x, ))
  )
plot_grid(plotlist = pop_trends$p_pop)
#> Warning: Removed 280 rows containing missing values (position_stack).
#> Warning: Removed 104 rows containing missing values (position_stack).

0.4 Analysis

0.5 Reproducibility

Reproducibility receipt

## datetime
Sys.time()
#> [1] "2021-12-07 16:03:19 CST"

## repository
if(requireNamespace('git2r', quietly = TRUE)) {
  git2r::repository()
} else {
  c(
    system2("git", args = c("log", "--name-status", "-1"), stdout = TRUE),
    system2("git", args = c("remote", "-v"), stdout = TRUE)
  )
}
#> Local:    main /home/alain/Documents/post-these/isu/RivFishTimeBiodiversityFacets
#> Head:     [861e78b] 2021-12-07: Raw data vizualisation

## session info
sessionInfo()
#> R version 4.0.5 (2021-03-31)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Debian GNU/Linux 10 (buster)
#> 
#> Matrix products: default
#> BLAS:   /home/alain/.Renv/versions/4.0.5/lib/R/lib/libRblas.so
#> LAPACK: /home/alain/.Renv/versions/4.0.5/lib/R/lib/libRlapack.so
#> 
#> locale:
#>  [1] LC_CTYPE=fr_FR.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=fr_FR.UTF-8        LC_COLLATE=fr_FR.UTF-8    
#>  [5] LC_MONETARY=fr_FR.UTF-8    LC_MESSAGES=fr_FR.UTF-8   
#>  [7] LC_PAPER=fr_FR.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] cowplot_1.1.1           rnaturalearthdata_0.1.0 rnaturalearth_0.1.0    
#>  [4] mapview_2.10.0          sf_0.9-7                rmarkdown_2.11         
#>  [7] scales_1.1.1            kableExtra_1.3.1        here_1.0.1             
#> [10] magrittr_2.0.1          forcats_0.5.1           stringr_1.4.0          
#> [13] dplyr_1.0.4             purrr_0.3.4             readr_2.1.1            
#> [16] tidyr_1.1.2             tibble_3.1.6            ggplot2_3.3.3          
#> [19] tidyverse_1.3.0         tarchetypes_0.3.2       targets_0.8.1          
#> [22] conflicted_1.1.0        nvimcom_0.9-122        
#> 
#> loaded via a namespace (and not attached):
#>  [1] leafem_0.1.6       colorspace_2.0-0   ellipsis_0.3.2     class_7.3-18      
#>  [5] leaflet_2.0.4.1    rprojroot_2.0.2    satellite_1.0.4    base64enc_0.1-3   
#>  [9] fs_1.5.1           rstudioapi_0.13    farver_2.0.3       ggrepel_0.9.1     
#> [13] fansi_0.5.0        lubridate_1.7.9.2  xml2_1.3.2         codetools_0.2-18  
#> [17] splines_4.0.5      cachem_1.0.4       knitr_1.36         jsonlite_1.7.2    
#> [21] broom_0.7.4        dbplyr_2.1.0       png_0.1-7          compiler_4.0.5    
#> [25] httr_1.4.2         backports_1.2.1    assertthat_0.2.1   Matrix_1.3-2      
#> [29] fastmap_1.1.0      cli_3.1.0          htmltools_0.5.1.1  tools_4.0.5       
#> [33] igraph_1.2.6       gtable_0.3.0       glue_1.5.1         Rcpp_1.0.6        
#> [37] cellranger_1.1.0   jquerylib_0.1.3    raster_3.4-5       vctrs_0.3.8       
#> [41] nlme_3.1-152       crosstalk_1.1.1    xfun_0.28          ps_1.6.0          
#> [45] rvest_0.3.6        lifecycle_1.0.1    hms_1.1.1          yaml_2.2.1        
#> [49] memoise_2.0.0      sass_0.3.1         stringi_1.7.6      highr_0.9         
#> [53] e1071_1.7-4        rlang_0.4.12       pkgconfig_2.0.3    evaluate_0.14     
#> [57] lattice_0.20-41    htmlwidgets_1.5.3  labeling_0.4.2     processx_3.5.2    
#> [61] tidyselect_1.1.1   bookdown_0.24      R6_2.5.1           generics_0.1.0    
#> [65] DBI_1.1.1          pillar_1.6.4       haven_2.3.1        withr_2.4.3       
#> [69] mgcv_1.8-34        units_0.6-7        sp_1.4-5           modelr_0.1.8      
#> [73] crayon_1.4.2       KernSmooth_2.23-18 utf8_1.2.2         tzdb_0.2.0        
#> [77] grid_4.0.5         readxl_1.3.1       data.table_1.13.6  git2r_0.29.0      
#> [81] callr_3.7.0        reprex_1.0.0       digest_0.6.27      classInt_0.4-3    
#> [85] webshot_0.5.2      stats4_4.0.5       munsell_0.5.0      viridisLite_0.3.0 
#> [89] bslib_0.2.4